# ------------------------------------------------------------------------------#
# Description: Create Table 2: Correlation of instrument with explanatory variables
# Project:     Peer Effects in Voluntary Environmental Policies:
#              An Application to Urban Water Quality
# Author:      Daniel A. Brent | dab320@psu.edu
# Created:     2025-06-11
# ------------------------------------------------------------------------------#

# Clear environment ------------------------------------------------------------
rm(list = ls())
gc()

# Load required packages -------------------------------------------------------
library(pacman)
p_load(data.table, dplyr, stringr, modelsummary, fixest)
options(scipen = 999)

# Load and process adoption data -----------------------------------------------
load('data/adoptions/adoptions.RData')
adopt[, adopted := max(adopt.any, na.rm = TRUE), by = pin]

# Merge Peer Eligibility - CS --------------------------------------------------
load('data/bucket/peer_elig_buckets_cs.RData')
dat <- merge(adopt, peer.buckets.cs, by = c('pin', 'year'))
rm(peer.buckets.cs)

# Merge Peer Eligibility - RG --------------------------------------------------
load('data/bucket/peer_elig_buckets_rg.RData')
dat <- merge(dat, peer.buckets.rg, by = c('pin', 'year'))
rm(peer.buckets.rg)

# Create average peer eligibility ----------------------------------------------
dat[, elig.peers.avg.100 := (elig.peers.cs.100 + elig.peers.rg.100) / 2]
dat[, elig.peers.avg.20 := (elig.peers.cs.20 + elig.peers.rg.20) / 2]

# Merge total peers ------------------------------------------------------------
load('data/bucket/total_peers.RData')
dat <- merge(dat, total.peers, by = 'pin')
rm(total.peers)

# Merge parcel characteristics -------------------------------------------------
load('data/parcel_build/parcel_build.RData')
dat <- merge(dat, parcel.build[, .(pin, zip5, sub.area, sqft, lot, beds, baths,
                                   yearbuilt, val.land, val.improve,
                                   ren.pre90, ren.90.99, ren.00.09, ren.10,
                                   water.prob)], by = 'pin')
rm(parcel.build)

# Merge census characteristics -------------------------------------------------
load('data/census/all_parcels_census_2010.RData')
parcels.2010[, blk := as.numeric(paste0(block, blkgrp))]
dat <- merge(dat, parcels.2010[, .(pin, tract, blkgrp, block, blk)])
rm(parcels.2010)

# Create total assessed value and eligibility flags ----------------------------
dat[, assess.value := val.land + val.improve]
dat[, max.elig.cs := max(elig.cs), by = pin]
dat[, max.elig.rg := max(elig.rg), by = pin]

# Filter to 2020 and key variables ---------------------------------------------
bal <- dat[year == 2020, .(pin, elig.cs, elig.rg, max.elig.cs, max.elig.rg, adopted,
                           elig.peers.cs.100, elig.peers.cs.20,
                           elig.peers.rg.100, elig.peers.rg.20,
                           elig.peers.avg.100, elig.peers.avg.20,
                           sub.area, zip5, tract, blkgrp, blk,
                           sqft, lot, beds, baths, yearbuilt, assess.value,
                           ren.10, ren.00.09)]

# Rescale lot size and assessed value ------------------------------------------
bal[, lot1000 := lot / 1000]
bal[, assess1000 := assess.value / 1000]

# Merge census block group characteristics -------------------------------------
load('data/census/census_bg.RData')
census.2014[, blkgrp := as.numeric(paste0(tract, bg))]
census.2014[, active.trans := pub.trans + bike + walk]
census.2014[, degree := college + adv.degree]

bal <- merge(bal, census.2014[, .(blkgrp, med.inc, non.white, non.white.asian,
                                  active.trans, degree, own.occ, gov.assist)], by = 'blkgrp')
rm(census.2014)

# Standardize key property variables -------------------------------------------
bal[, sqft.sd        := scale(sqft)]
bal[, lot.sd         := scale(lot)]
bal[, beds.sd        := scale(beds)]
bal[, baths.sd       := scale(baths)]
bal[, yearbuilt.sd   := scale(yearbuilt)]
bal[, assess.sd      := scale(assess.value)]

# Panel (a): Property Balance Regressions -----------------------------------------------------------------------------

m1 <- feols(elig.peers.cs.100 ~ sqft.sd + lot.sd + beds.sd + baths.sd + yearbuilt.sd + assess.sd | blkgrp,
            cluster = 'blkgrp', data = bal[max.elig.cs == 1])

m2 <- feols(elig.peers.rg.100 ~ sqft.sd + lot.sd + beds.sd + baths.sd + yearbuilt.sd + assess.sd | blkgrp,
            cluster = 'blkgrp', data = bal[max.elig.cs == 1])

m3 <- feols(elig.peers.avg.100 ~ sqft.sd + lot.sd + beds.sd + baths.sd + yearbuilt.sd + assess.sd | blkgrp,
            cluster = 'blkgrp', data = bal[max.elig.cs == 1])

m4 <- feols(elig.peers.cs.100 ~ sqft.sd + lot.sd + beds.sd + baths.sd + yearbuilt.sd + assess.sd | blkgrp,
            cluster = 'blkgrp', data = bal[max.elig.rg == 1])

m5 <- feols(elig.peers.rg.100 ~ sqft.sd + lot.sd + beds.sd + baths.sd + yearbuilt.sd + assess.sd | blkgrp,
            cluster = 'blkgrp', data = bal[max.elig.rg == 1])

m6 <- feols(elig.peers.avg.100 ~ sqft.sd + lot.sd + beds.sd + baths.sd + yearbuilt.sd + assess.sd | blkgrp,
            cluster = 'blkgrp', data = bal[max.elig.rg == 1])

# Preview Panel (a)
etable(m1, m2, m3, m4, m5, m6,
       fixef_sizes = TRUE, depvar = FALSE, se.below = TRUE,
       headers = c("CS", "RG", "Avg.", "CS", "RG", "Avg."),
       extralines = list("-Eligibility" = c("Cistern", "Cistern", "Cistern",
                                            "Rain Garden", "Rain Garden", "Rain Garden")))

# Customize LaTeX output
style_tex <- style.tex(tablefoot.value = '', model.title = '',
                       depvar.title = '', var.title = '\\midrule',
                       stats.title = '\\midrule')

setFixest_etable(drop = "Constant",
                 fitstat = ~ n + ivfall + sargan + wh,
                 digits = 3, digits.stats = 2,
                 depvar = FALSE, style.tex = style_tex)

etable(m1, m2, m3, m4, m5, m6, tex = TRUE,
       fixef_sizes = TRUE, depvar = FALSE, se.below = TRUE,
       signif.code = c('*' = 0.1, '**' = 0.05, '***' = 0.01), digits = 'r2',
       headers = list("_Peer Eligibility" = c("CS", "RG", "Avg.", "CS", "RG", "Avg.")),
       extralines = list("-Eligibility" = c("Cistern", "Cistern", "Cistern",
                                            "Rain Garden", "Rain Garden", "Rain Garden")),
       file = "output/tables/table_2a.tex", replace = TRUE)

rm(m1, m2, m3, m4, m5, m6)

# Panel (b): Census Balance Regressions -----------------------------------------------------------------------------

# Standardize demographic variables
bal[, med.inc.sd       := scale(med.inc)]
bal[, non.white.sd     := scale(non.white)]
bal[, active.trans.sd  := scale(active.trans)]
bal[, degree.sd        := scale(degree)]
bal[, own.occ.sd       := scale(own.occ)]
bal[, gov.assist.sd    := scale(gov.assist)]

m1 <- feols(elig.peers.cs.100 ~ med.inc.sd + non.white.sd + active.trans.sd +
              degree.sd + own.occ.sd + gov.assist.sd | tract,
            cluster = "tract", data = bal[max.elig.cs == 1])

m2 <- feols(elig.peers.rg.100 ~ med.inc.sd + non.white.sd + active.trans.sd +
              degree.sd + own.occ.sd + gov.assist.sd | tract,
            cluster = "tract", data = bal[max.elig.cs == 1])

m3 <- feols(elig.peers.avg.100 ~ med.inc.sd + non.white.sd + active.trans.sd +
              degree.sd + own.occ.sd + gov.assist.sd | tract,
            cluster = "tract", data = bal[max.elig.cs == 1])

m4 <- feols(elig.peers.cs.100 ~ med.inc.sd + non.white.sd + active.trans.sd +
              degree.sd + own.occ.sd + gov.assist.sd | tract,
            cluster = "tract", data = bal[max.elig.rg == 1])

m5 <- feols(elig.peers.rg.100 ~ med.inc.sd + non.white.sd + active.trans.sd +
              degree.sd + own.occ.sd + gov.assist.sd | tract,
            cluster = "tract", data = bal[max.elig.rg == 1])

m6 <- feols(elig.peers.avg.100 ~ med.inc.sd + non.white.sd + active.trans.sd +
              degree.sd + own.occ.sd + gov.assist.sd | tract,
            cluster = "tract", data = bal[max.elig.rg == 1])

# Preview Panel (b)
etable(m1, m2, m3, m4, m5, m6)

varnames <- c('tract' = 'Tract',
              'med.inc.sd'      = 'Median Income',
              'non.white.sd'    = 'Non-white',
              'active.trans.sd' = 'Active Trans.',
              'degree.sd'       = 'Degree',
              'own.occ.sd'      = 'Owner Occupied',
              'gov.assist.sd'   = 'Gov. Assistance')

etable(m1, m2, m3, m4, m5, m6, tex = TRUE,
       fixef_sizes = TRUE, depvar = FALSE, se.below = TRUE,
       signif.code = c('*' = 0.1, '**' = 0.05, '***' = 0.01), digits = 'r2',
       fitstat = c("n"),
       headers = list("_Peer Eligibility" = c("CS", "RG", "Avg.", "CS", "RG", "Avg.")),
       extralines = list("-Eligibility" = c("Cistern", "Cistern", "Cistern",
                                            "Rain Garden", "Rain Garden", "Rain Garden")),
       dict = varnames,
       file = "output/tables/table_2b.tex", replace = TRUE)

